# Set optionsknitr::opts_chunk$set(echo =TRUE,warning =FALSE,message =FALSE,fig.align ='center',fig.retina =2)rm(list=ls())library(tinytex)
Warning: package 'tinytex' was built under R version 4.5.2
Code
library(ggplot2)
Warning: package 'ggplot2' was built under R version 4.5.2
Code
#library(table1)library(gt)library(survival)library(data.table)library(randomForest)library(grf)library(policytree)library(DiagrammeR)#library(grid)#library(forestploter)#library(randomizr)# library(devtools)# install_github("larry-leon/weightedsurv", force = TRUE)#install.packages("weightedsurv")# install_github("larry-leon/forestsearch", force = TRUE)library(forestsearch)library(weightedsurv)
Warning: package 'weightedsurv' was built under R version 4.5.2
Code
# Set theme for plotstheme_set(theme_minimal(base_size =12))
1 Summary
Reproducing main GBSG analysis
1.1 Datasetup
Code
df.analysis <- gbsgdf.analysis <-within(df.analysis,{id <-as.numeric(c(1:nrow(df.analysis))) # time to monthstime_months <- rfstime/30.4375grade3 <-ifelse(grade=="3",1,0)treat <- hormon})confounders.name <-c("age","meno","size","grade3","nodes","pgr","er")outcome.name <-c("time_months")event.name <-c("status")id.name <-c("id")treat.name <-c("hormon")
# NOTE: In general for GRF trees# leaf1 --> recommend control# leaf2 --> recommend treatment# Tree depth 1plot(grf_est1$tree1,leaf.labels=c("Control","Treat"))
Code
# Tree depth 2plot(grf_est1$tree2,leaf.labels=c("Control","Treat"))
1.4 Forestsearch with depth=2 (maxk = 2)
Code
# Setup parallel processinglibrary(doFuture)
Warning: package 'future' was built under R version 4.5.2
=== Two-Stage Consistency Evaluation Enabled ===
Stage 1 screening splits: 30
Maximum total splits: 1000
Batch size: 20
================================================
GRF stage for cut selection with dmin, tau = 12 0.6
tau, maxdepth = 46.75811 2
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
2 3 -4.10 604.00 1.06 1
11 4 -7.90 112.00 2.81 2
21 5 3.86 177.00 1.87 2
4 7 -5.89 356.00 1.33 2
Selected subgroup:
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"
All splits:
[1] "er <= 0" "age <= 50" "age <= 43"
GRF cuts identified: 3
Cuts: er <= 0, age <= 50, age <= 43
# of continuous/categorical characteristics 5 2
Continuous characteristics: age size nodes pgr er
Categorical characteristics: meno grade3
## Prior to lasso: age size nodes pgr er
#### Lasso selection results
7 x 1 sparse Matrix of class "dgCMatrix"
s0
age .
meno .
size 0.005433435
grade3 0.178139021
nodes 0.049670523
pgr -0.001812895
er .
Cox-LASSO selected: size grade3 nodes pgr
Cox-LASSO not selected: age meno er
### End Lasso selection
## After lasso: size nodes pgr
Default cuts included from Lasso: size <= mean(size) size <= median(size) size <= qlow(size) size <= qhigh(size) nodes <= mean(nodes) nodes <= median(nodes) nodes <= qlow(nodes) nodes <= qhigh(nodes) pgr <= mean(pgr) pgr <= median(pgr) pgr <= qlow(pgr) pgr <= qhigh(pgr)
Categorical after Lasso: grade3
Factors per GRF: er <= 0 age <= 50 age <= 43
Initial GRF cuts included er <= 0 age <= 50 age <= 43
Factors included per GRF (not in lasso) er <= 0 age <= 50 age <= 43
===== CONSOLIDATED CUT EVALUATION (IMPROVED) =====
Evaluating 16 cut expressions once and caching...
Cut evaluation summary:
Total cuts: 16
Valid cuts: 16
Errors: 0
✓ All 16 factors validated as 0/1
===== END CONSOLIDATED CUT EVALUATION =====
# of candidate subgroup factors= 16
[1] "er <= 0" "age <= 50" "age <= 43" "size <= 29.3" "size <= 25"
[6] "size <= 20" "size <= 35" "nodes <= 5" "nodes <= 3" "nodes <= 1"
[11] "nodes <= 7" "pgr <= 110" "pgr <= 32.5" "pgr <= 7" "pgr <= 131.8"
[16] "grade3"
Number of possible configurations (<= maxk): maxk = 2 , # combinations = 528
Events criteria: control >= 10 , treatment >= 10
Subgroup search completed in 0.01 minutes
Found 13 subgroup candidate(s)
# of candidate subgroups (meeting all criteria) = 13
# of unique initial candidates: 13
# Restricting to top stop_Kgroups = 30
# of candidates to evaluate: 13
Algorithm: Two-stage sequential
Stage 1 splits: 30
Screen threshold: 0.763
Max total splits: 1000
Batch size: 20
Parallel processing: callr with 30 workers
Warning: package ‘future’ was built under R version 4.5.2
Warning: package ‘future’ was built under R version 4.5.2
Warning: package ‘future’ was built under R version 4.5.2
Warning: package ‘future’ was built under R version 4.5.2
Warning: package ‘future’ was built under R version 4.5.2
Warning: package ‘future’ was built under R version 4.5.2
Warning: package ‘future’ was built under R version 4.5.2
Warning: package ‘future’ was built under R version 4.5.2
Warning: package ‘future’ was built under R version 4.5.2
Warning: package ‘future’ was built under R version 4.5.2
Warning: package ‘future’ was built under R version 4.5.2
Warning: package ‘future’ was built under R version 4.5.2
#output_dir <- "dev/vignettes-working/applications/gbsg/results"output_dir <-"results/"save_results <-dir.exists(output_dir)# File pre-fix for savingfileout_boot <-c("gbsg-k2_v4_B=100")fileout_cv <-c("gbsg-k2_v4_CV=20")# patchhwork needed for a combined bootstrap plot (otherwise if not avaialable will not produce)library(patchwork)# Number of bootstrap samplesNB <-100system.time({fs_bc <-forestsearch_bootstrap_dofuture(fs.est = fs, nb_boots = NB, show_three =FALSE, details =TRUE)})
=== Bootstrap Event Count Summary ===
Total bootstrap iterations: 100
Event threshold: <12 events
ORIGINAL Subgroup H on BOOTSTRAP samples:
Control arm <12 events: 0 (0.0%)
Treatment arm <12 events: 0 (0.0%)
Either arm <12 events: 0 (0.0%)
ORIGINAL Subgroup Hc on BOOTSTRAP samples:
Control arm <12 events: 0 (0.0%)
Treatment arm <12 events: 0 (0.0%)
Either arm <12 events: 0 (0.0%)
NEW Subgroups found: 87 (87.0%)
NEW Subgroup H* on ORIGINAL data:
Control arm <12 events: 1 (1.1% of successful)
Treatment arm <12 events: 1 (1.1% of successful)
Either arm <12 events: 2 (2.3% of successful)
NEW Subgroup Hc* on ORIGINAL data:
Control arm <12 events: 0 (0.0% of successful)
Treatment arm <12 events: 0 (0.0% of successful)
Either arm <12 events: 0 (0.0% of successful)
Code
summaries$diagnostics_table_gt
Bootstrap Diagnostics Summary
Analysis of 100 bootstrap iterations
Category
Metric
Value
Success Rate1
Total iterations
100
Successful subgroup ID
87 (87.0%)
Failed to find subgroup
13 (13.0%)
Success rating
Good ✓✓
Subgroup H (Questionable)
Unadjusted estimate
1.95 (1.04, 3.67)
Bias-corrected estimate
1.56 (0.71, 3.43)
Bias correction impact2
20.2%
CI width change3
2.64 -> 2.72
Subgroup Hc (Recommend)
Unadjusted estimate
0.61 (0.47, 0.80)
Bias-corrected estimate
0.64 (0.50, 0.81)
Bias correction impact2
3.4%
CI width change3
0.33 -> 0.32
Bootstrap Quality: H
Valid iterations
87
Mean (SD)
0.44 (0.40)
Coefficient of variation4
90.0%
Skewness5
0.01
Bootstrap Quality: Hc
Valid iterations
87
Mean (SD)
-0.45 (0.21)
Coefficient of variation4
45.5%
Skewness5
0.27
Search Performance
Mean max HR found
3.32 (1.37)
Mean factors evaluated
39.4
Mean combinations tried
832
Proportion at maxk
--
1Success Rate: Proportion of bootstrap samples where ForestSearch identified a valid subgroup
2Bias Correction Impact: Percentage change from unadjusted to bias-corrected estimate
3CI Width Change: Confidence interval width before -> after bias correction
4Coefficient of Variation: Standard deviation as % of mean (lower is better)
5Skewness: Measure of asymmetry (0 = symmetric, |skew| < 1 is generally good)
Interpretation Guide:
✓ Good stability: Subgroup is reliably identified in most bootstrap samples.
⚠ High variability: Bootstrap estimates are imprecise (CV >= 25%). Consider increasing nb_boots or sample size.
Code
summaries$subgroup_summary$original_agreement
Metric Value
<char> <char>
1: Total bootstrap iterations 100
2: Successful iterations 87
3: Failed iterations (no subgroup) 13
4: Exact match with original 5 (5.7%)
5: Different from original 82 (94.3%)
Cross-validation complete:
- Time: 0.31 minutes
- Subgroup found in 60 % of folds
Any found: 0.6
Exact match: 0.3666667
At least 1 match: 0.3666667
Cov 1 any: 0.3666667
Cov 2 any: NaN
Cov 1 and 2 any: 0
Cov 1 exact: 0.3666667
Cov 2 exact: NaN
Agreement (sens, ppv) in H and Hc: 0.5121951 0.9635762 0.65625 0.9356913
Code
# Reset workers to singleplan(sequential)summary_OOB <-forestsearch_KfoldOut(res=fs_OOB, details=TRUE, outall=TRUE)
Any Exact At least 1 Cov1 Cov2 Cov 1 & 2 Cov1 exact Cov2 exact
[1,] 0.7 0.4 0.4 0.4 NaN 0 0.4 NaN
[2,] 0.7 0.3 0.3 0.3 NaN 0 0.3 NaN
[3,] 0.8 0.3 0.3 0.3 NaN 0 0.3 NaN
[4,] 0.6 0.3 0.3 0.3 NaN 0 0.3 NaN
[5,] 0.5 0.2 0.2 0.2 NaN 0 0.2 NaN
[6,] 0.7 0.1 0.1 0.1 NaN 0 0.1 NaN